#!/usr/bin/env python
# encoding: utf-8
from bs4 import BeautifulSoup
from spider import Spider
from console_pipeline import ConsolePipeline


class MyProcess(object):
    start_url = "https://www.smzdm.com/"
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166  Safari/535.19'
    }

    def process(self, html):
        
        soup = BeautifulSoup(html, 'lxml')
        print(soup.head.title.text)
        elements = soup.find(id='J_scroll_ul').find_all("li")
        for element in elements:
            items = dict()
            items["link"] = element.a['href']
            items["title"] = element.find(class_='zm-card-title').text
            yield items


if __name__ == "__main__":
    Spider(MyProcess()).addPipeline(ConsolePipeline()).start()
